*---------------------------------------------------------------------------------------------
// Stata do-file for
// Tober, T.
// 'European Institutional Integration, Trade Unions and Income Inequality'
// The Socio-Economic Review
// Main tables and figures
*---------------------------------------------------------------------------------------------

version 15
set more off

*---------------------------------------------------------------------------------------------
// Prepare data and test for stationarity
*---------------------------------------------------------------------------------------------

* load data
use "maindata.dta", replace

* xtset
encode country, gen(cntry)
xtset cntry year

* time trends
gen yr = year - 1945
gen yr_sq = yr*yr

* recode top incomes
replace p90p100 = p90p100*100
replace p99p100 = p99p100*100

* recode income ratios
foreach var of varlist p50p10 p90p10 p90p50{
gen `var'trans = `var'*100
}

* recode trade openess
replace open = open*100

* recode share of employment
replace semp = semp*100

* recode GDP per capita (in thousands)
replace gdppc = gdppc/1000

* interpolate 
bys cntry: ipolate totden year, gen(totden2)

* five year averages
bysort cntry (year): gen totden_avg = (totden2[_n-1] + totden2[_n-2] + totden2[_n-3] + totden2[_n-4] + totden2[_n-5])/5

* panel unit root
xtunitroot ips p90p100
xtunitroot ips p99p100
xtunitroot ips p90p100, trend
xtunitroot ips p99p100, trend

* panel cointegration
xtcointtest kao p90p100 eurii totden_avg barglev open gdppc semp yr_sch cpg_sw2014 
xtcointtest kao p99p100 eurii totden_avg barglev open gdppc semp yr_sch cpg_sw2014 

*---------------------------------------------------------------------------------------------
// Table 1
*---------------------------------------------------------------------------------------------

* Model 1
xtscc p90p100 c.eurii##c.totden_avg barglev open gdppc semp yr_sch cpg_sw2014 yr yr_sq, fe lag(10)

* Model 2
xtscc p90p100 c.eurii##c.totden_avg barglev open gdppc semp yr_sch cpg_sw2014 femlfp finreform_n toptax yr yr_sq, fe lag(10)

* Model 3
xtscc p90p100 c.eurii##c.totden_avg barglev open gdppc semp yr_sch cpg_sw2014 i.year, fe lag(10)
 
* Model 4
xtscc p90p100 c.eurii##c.totden_avg barglev open gdppc semp yr_sch cpg_sw2014 femlfp finreform_n toptax i.year, fe lag(10)

* Model 5
xtscc p99p100 c.eurii##c.totden_avg barglev open gdppc semp yr_sch cpg_sw2014 yr yr_sq, fe lag(10)

* Model 6
xtscc p99p100 c.eurii##c.totden_avg barglev open gdppc semp yr_sch cpg_sw2014 femlfp finreform_n toptax yr yr_sq, fe lag(10)

* Model 7
xtscc p99p100 c.eurii##c.totden_avg barglev open gdppc semp yr_sch cpg_sw2014 i.year, fe lag(10)

* Model 8
xtscc p99p100 c.eurii##c.totden_avg barglev open gdppc semp yr_sch cpg_sw2014 femlfp finreform_n toptax i.year, fe lag(10) 

*---------------------------------------------------------------------------------------------
// Figure 2 (paper-version produced with R; R code available upon request)
*--------------------------------------------------------------------------------------------- 

* Model 1
xtscc p90p100 c.eurii##c.totden_avg barglev open gdppc semp yr_sch cpg_sw2014 yr yr_sq, fe lag(10)
margins, at (totden_avg=(0 (10) 100) eurii=(10 100)) atmeans noatlegend post
marginsplot 

* Model 5
xtscc p99p100 c.eurii##c.totden_avg barglev open gdppc semp yr_sch cpg_sw2014 yr yr_sq, fe lag(10)
margins, at (totden_avg=(0 (10) 100) eurii=(10 100)) atmeans noatlegend post
marginsplot

*---------------------------------------------------------------------------------------------
// Table 2 
*--------------------------------------------------------------------------------------------- 

* Model 1
xtscc p90p100 c.eurii##c.totden_avg barglev open gdppc semp yr_sch cpg_sw2014 yr yr_sq, fe lag(10)
margins, dydx(totden_avg) at (eurii=(10 100)) atmeans post 

* Model 5
xtscc p99p100 c.eurii##c.totden_avg barglev open gdppc semp yr_sch cpg_sw2014 yr yr_sq, fe lag(10)
margins, dydx(totden_avg) at (eurii=(10 100)) atmeans post 

*---------------------------------------------------------------------------------------------
// Table 3 (see Models 1 and 2 below)
*--------------------------------------------------------------------------------------------- 

* Model 3
xtscc p50p10trans c.eurii##c.totden_avg barglev open gdppc semp yr_sch cpg_sw2014 yr yr_sq, fe lag(10) 

* Model 4
xtscc p50p10trans c.eurii##c.totden_avg barglev open gdppc semp yr_sch cpg_sw2014 i.year, fe lag(10) 

* Model 5
xtscc p90p10trans c.eurii##c.totden_avg barglev open gdppc semp yr_sch cpg_sw2014 yr yr_sq, fe lag(10) 

* Model 6
xtscc p90p10trans c.eurii##c.totden_avg barglev open gdppc semp yr_sch cpg_sw2014 i.year, fe lag(10)

* Model 7
xtscc p90p50trans c.eurii##c.totden_avg barglev open gdppc semp yr_sch cpg_sw2014 yr yr_sq, fe lag(10) 

* Model 8
xtscc p90p50trans c.eurii##c.totden_avg barglev open gdppc semp yr_sch cpg_sw2014 i.year, fe lag(10)

*---------------------------------------------------------------------------------------------
// Figure 3 (paper-version produced with R; R code available upon request)
*---------------------------------------------------------------------------------------------

* Model 6
xtscc p90p10trans c.eurii##c.totden_avg barglev open gdppc semp yr_sch cpg_sw2014 i.year, fe lag(10) 
margins, dydx(totden_avg) at (eurii=(0 (10) 100)) atmeans post 
marginsplot, yline(0)

* Model 8
xtscc p90p50trans c.eurii##c.totden_avg barglev open gdppc semp yr_sch cpg_sw2014 i.year, fe lag(10) 
margins, dydx(totden_avg) at (eurii=(0 (10) 100)) atmeans post 
marginsplot, yline(0)

*---------------------------------------------------------------------------------------------
// Table 3 continued (Models 1 and 2)
*--------------------------------------------------------------------------------------------- 

use "midata.dta", replace

* encode country variable
encode country, gen(cntry)

* time trends
gen yr = year - 1945
gen yr_sq = yr*yr

* recode gini market
foreach var of varlist _1_gini_mkt-_100_gini_mkt {
replace `var' = `var'*100
}

* recode trade openess
replace open = open*100

* recode share of employment
replace semp = semp*100

* recode GDP per capita (in thousands)
replace gdppc = gdppc/1000

* interpolate 
bys cntry: ipolate totden year, gen(totden2)

* five year averages
bysort cntry (year): gen totden_avg = (totden2[_n-1] + totden2[_n-2] + totden2[_n-3] + totden2[_n-4] + totden2[_n-5])/5

* drop NAs
foreach var of varlist eurii totden_avg barglev open gdppc semp yr_sch cpg_sw2014{
drop if `var' == .
}

* xtset
mi xtset cntry year
mi stset, clear

* Model 1
mi estimate, cmdok:xtscc gini_mkt c.eurii##c.totden_avg barglev open gdppc semp yr_sch cpg_sw2014 yr yr_sq, fe lag(10)
mibeta gini_mkt c.eurii##c.totden_avg barglev open gdppc semp yr_sch cpg_sw2014 yr yr_sq i.cntry // to get adj. R-squared

* Model 2
mi estimate, cmdok:xtscc gini_mkt c.eurii##c.totden_avg barglev open gdppc semp yr_sch cpg_sw2014 i.year, fe lag(10)
mibeta gini_mkt c.eurii##c.totden_avg barglev open gdppc semp yr_sch cpg_sw2014 i.year i.cntry // to get adj. R-squared
